# # -*- coding: utf-8 -*-
# import random
# import scrapy
# from scrapy import Request
# from zc_core.dao.item_data_dao import ItemDataDao
# from crecgec.rules import *
# from zc_core.dao.sku_pool_dao import SkuPoolDao
# from zc_core.dao.batch_dao import BatchDao
# from zc_core.util.batch_gen import time_to_batch_no
# from zc_core.util.done_filter import DoneFilter
# from datetime import datetime
#
#
# class FullSpider(BaseSpider):
#     name = 'full_catalog'
#
#     item_url = 'https://mall.crecgec.com/search/goodsDetail/{}/{}/{}.html'
#
#     def __init__(self, batchNo=None, *args, **kwargs):
#         super(FullSpider, self).__init__(*args, **kwargs)
#         if not batchNo:
#             self.batch_no = time_to_batch_no(datetime.now())
#         else:
#             self.batch_no = int(batchNo)
#         # 创建批次记录
#         BatchDao().create_batch(self.batch_no)
#         # 避免重复采集
#         self.done_filter = DoneFilter(self.batch_no)
#         # self.batch_no = 20210713
#
#     def start_requests(self):
#
#         pool_list = ItemDataDao().get_batch_data_list(self.batch_no, query={
#             "catalog2Name": {"$exists": None}
#         }, fields={"_id": 1, "catalog3Id": 1, "catalog2Id": 1, "batchNo": 1})
#         self.logger.info('全量：%s' % (len(pool_list)))
#         random.shuffle(pool_list)
#         for sku in pool_list:
#             _id = sku.get('_id')
#             catalog3Id = sku.get('catalog3Id')
#             offline_time = sku.get('offlineTime', 0)
#             batch_no = sku.get('batchNo')
#             settings = get_project_settings()
#             # if offline_time > settings.get('MAX_OFFLINE_TIME', 2):
#             #     self.logger.info('忽略: [%s][%s]', sku, offline_time)
#             #     continue
#             # 避免重复采集
#             # if self.done_filter.contains(_id) and not settings.get('FORCE_RECOVER', False):
#             #     self.logger.info('已采：[%s]', _id)
#             #     continue
#             # 采集商品
#             yield Request(
#                 url=self.item_url.format(_id.split('_')[0], _id.split('_')[1], catalog3Id),
#                 callback=self.parse_content_data,
#                 errback=self.error_back,
#                 priority=260,
#                 meta={
#                     'reqType': 'item',
#                     'batchNo': self.batch_no,
#                     'skuId': _id,
#                     'catalog3Id': catalog3Id,
#                 },
#             )
#
#     # 处理ItemData
#     def parse_content_data(self, response):
#         # 处理商品详情页
#         data = full_catalog(response)
#         if data:
#             self.logger.info('商品: [%s]' % data.get('skuId'))
#             yield data
#

